For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.
The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.
The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_all: All possible elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all[-c(1:2)]))
## MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti V Cr Mn Fe
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ni Cu Zn As Rb Sr Y Zr Nb Mo Rh Ag Sn
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ba La Ta Pb Bi
## 0 0 0 0 0
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-As) %>%
select(-Nb) %>%
select(-Ba) %>%
select(-MgO) %>%
select(-Cu) %>%
select(-Cr) %>%
select(-Mo) %>%
select(-Rh) %>%
select(-Ag) %>%
select(-Sn) %>%
select(-La) %>%
select(-Ta) %>%
select(-Pb) %>%
select(-Bi)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 7), data = pxrf_final, label = TRUE, label.size = 3)
PCA with final elements:
# Elements
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.2132 1.0303 0.89076 0.73069 0.56172 0.49807 0.30664
## Proportion of Variance 0.6123 0.1327 0.09918 0.06674 0.03944 0.03101 0.01175
## Cumulative Proportion 0.6123 0.7450 0.84417 0.91090 0.95035 0.98136 0.99311
## PC8
## Standard deviation 0.23478
## Proportion of Variance 0.00689
## Cumulative Proportion 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")
autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam grouped by area")
autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam grouped by sample type")
PCA(pxrf_final[3:10])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 47 individuals, described by 8 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
highlight_branches_col
area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas (Copper included)")
rect.dendrogram(dend, 7, border = "Black", lty = 5)
legend("topright", legend = levels(area), fill = cols_a)
# HCA dendrogram, samples color coded by type:
dend3 <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
highlight_branches_col
type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l = 50)
col_type <- cols_t[type]
labels_colors(dend3) <- col_type[order.dendrogram(dend3)]
plot(dend3, main="HCA with sample types (NO COPPER)")
rect.dendrogram(dend3, 6, border = "Black", lty = 5)
legend("topright", legend = levels(type), fill = cols_t)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern);
library(reshape2)
# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=2)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample")
ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
geom_line() +
ggtitle("Grain size distribution curves")
# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample")
ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
geom_bar(stat = 'identity', position = 'fill') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Removing duplicates for clarity
loi <- subset(loi[1:47, ])
tga <- subset(tga[-c(18,20,47,49), ])
# Transforming data to long form
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context")
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context")
# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample")
tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")
LOI dataset:
loi[c(8:9,13:14)]
## context type c550 c950
## AY-1 Area D acropolis mudbrick 2.088275 4.8891890
## AY-2 Area D acropolis mudbrick 2.210488 1.9841768
## AY-3 Area D acropolis mudbrick 1.360910 2.2488113
## AY-4 Area D acropolis mudbrick 1.430588 1.8964316
## AY-5 Area D acropolis mudbrick 1.338035 3.3256238
## AY-6 Area D acropolis mud plaster 1.480312 1.2721627
## AY-7 Area D acropolis mudbrick 1.430335 3.1305245
## AY-8 Area D acropolis mud plaster 1.649521 1.9801623
## AY-9 Area D acropolis mudbrick 2.368712 3.9045130
## AY-10 Area D acropolis mudbrick 1.049666 3.1941032
## AY-11 Area D acropolis mudbrick 1.082732 1.9786697
## AY-12 Area D acropolis PEM 1.183502 0.7730823
## AY-13 Area D acropolis mudbrick 1.389532 4.5525165
## AY-14 Area D acropolis mudbrick 1.787980 2.7523295
## AY-15 Area D acropolis mudbrick 2.233289 2.7459408
## AY-16 Area D acropolis mudbrick 1.026364 0.9583722
## AY-17 Area D acropolis mudbrick 1.569395 3.5356312
## AY-18 Area D acropolis mudbrick 1.770410 2.6403398
## AY-19 Area D acropolis mudbrick 1.454623 3.3395017
## AY-20 Area D acropolis mud plaster 1.593025 1.5216091
## AY-21 Area D acropolis mudbrick 1.729351 1.7560317
## AY-22 Area D wall mudbrick 1.280392 1.5344272
## AY-23 Area D wall mudbrick 1.782016 1.9743301
## AY-24 Area D wall mudbrick 1.697649 1.1877784
## AY-25 Area D wall mudbrick 1.528936 2.7365019
## AY-26 Area D wall mudbrick 1.802970 3.6061442
## AY-27 Area D wall mud plaster 1.885674 1.5169278
## AY-28 Area D wall mudbrick 1.399589 0.7854046
## AY-29 Area D wall mud plaster 1.855100 2.0634749
## AY-30 Area D wall mudbrick 2.552046 3.4633045
## AY-31 Area D wall mudbrick 1.455348 3.4109094
## AY-32 Area D wall mudbrick 1.244529 1.4577882
## AY-33 Area D wall mudbrick 1.300813 0.8936149
## AY-34 Area D wall mudbrick 1.581909 1.7044229
## AY-35 Area D wall mudbrick 1.931867 2.6061513
## AY-36 Area D wall mudbrick 2.422288 2.5205456
## AY-37 Area D wall mudbrick 1.139581 1.0607829
## AY-38 Area D wall mudbrick 2.130927 1.4162417
## AY-39 Area D wall mud mortar 1.904037 2.5000000
## AY-40 Area D wall mud plaster 2.482920 2.5301253
## AY-41 Area D wall mudbrick 2.565292 3.7553575
## AY-42 Hellenistic area A mudbrick 1.593653 1.9123424
## AY-50 Builder's floor PEM 1.012178 1.5457741
## AY-51 Builder's floor PEM 1.212985 1.2668590
## AY-52 Builder's floor PEM 1.763143 2.3906130
## AY-53 Builder's floor PEM 1.402188 8.0417546
## AY-54 Builder's floor PEM 1.843655 2.9683284
TGA dataset:
tga[c(8:9,10:11)]
## context type c550 c950
## AY-1 Area D acropolis mudbrick 2.398013 2.4471417
## AY-2 Area D acropolis mudbrick 1.736874 7.9947176
## AY-4 Area D acropolis mudbrick 1.929141 1.4280310
## AY-3 Area D acropolis mudbrick 1.233825 3.4734918
## AY-5 Area D acropolis mudbrick 1.644479 3.5987261
## AY-7 Area D acropolis mudbrick 1.505973 3.4082173
## AY-8 Area D acropolis mud plaster 1.558115 3.6606406
## AY-9 Area D acropolis mudbrick 1.927070 3.2099602
## AY-10 Area D acropolis mudbrick 1.176203 3.8700760
## AY-11 Area D acropolis mudbrick 1.248990 1.3615058
## AY-12 Area D acropolis PEM 1.274945 0.7860752
## AY-13 Area D acropolis mudbrick 1.281796 3.7069429
## AY-14 Area D acropolis mudbrick 1.971081 2.2591414
## AY-15 Area D acropolis mudbrick 2.647777 2.4588519
## AY-16 Area D acropolis mudbrick 1.329581 3.4621816
## AY-17 Area D acropolis mudbrick 1.631206 4.5833762
## AY-18 Area D acropolis mudbrick 1.698302 3.6382114
## AY-19 Area D acropolis mudbrick 1.874606 3.2830310
## AY-20 Area D acropolis mud plaster 1.877608 2.0755290
## AY-21 Area D acropolis mudbrick 1.413217 1.8372703
## AY-22 Area D wall mudbrick 2.099634 2.0131512
## AY-23 Area D wall mudbrick 1.749414 1.3528300
## AY-24 Area D wall mudbrick 1.711251 1.4208525
## AY-25 Area D wall mudbrick 1.938736 1.9671807
## AY-26 Area D wall mudbrick 1.663242 3.9001094
## AY-27 Area D wall mud plaster 1.696577 1.6560255
## AY-28 Area D wall mudbrick 1.259601 0.7233976
## AY-29 Area D wall mud plaster 1.724623 3.9961850
## AY-30 Area D wall mudbrick 2.586865 3.2631063
## AY-31 Area D wall mudbrick 1.343570 3.0447471
## AY-32 Area D wall mudbrick 1.336629 1.4660852
## AY-33 Area D wall mudbrick 1.268116 0.8460754
## AY-34 Area D wall mudbrick 1.692888 1.4506317
## AY-35 Area D wall mudbrick 2.002379 2.3973296
## AY-6 Area D acropolis mud plaster 1.659626 1.2887389
## AY-36 Area D wall mudbrick 2.500488 2.3943098
## AY-37 Area D wall mudbrick 1.077605 0.7994378
## AY-38 Area D wall mudbrick 2.164785 1.6976633
## AY-39 Area D wall mud mortar 1.915888 2.8108623
## AY-40 Area D wall mud plaster 2.618731 2.4977211
## AY-42 Hellenistic area A mudbrick 1.655597 1.5855926
## AY-50 Builder's floor PEM 1.347992 1.5505378
## AY-51 Builder's floor PEM 1.282281 1.6713598
## AY-52 Builder's floor PEM 1.492264 2.2490706
## AY-53 Builder's floor PEM 1.276679 5.4590326
## AY-54 Builder's floor PEM 1.651196 3.0674290
# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI",
x ="Temperature", y = "LOI")
# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI",
x ="Temperature", y = "LOI")
# Color by context biplot: Trad LOI
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) +
labs(title = "Traditional LOI - color by context",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Color by context biplot: TGA
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) +
labs(title = "TGA LOI - color by context",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Traditional LOI",
fill="Temperature",
x = "",
y = "LOI (%)")
# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Thermogravimeter LOI",
fill="Temperature",
x = "",
y = "LOI (%)")
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :37.56 Min. : 7.97 Min. :16.81
## 1st Qu.:42.13 1st Qu.: 9.72 1st Qu.:19.76
## Median :43.64 Median :10.38 Median :21.18
## Mean :43.92 Mean :10.49 Mean :21.22
## 3rd Qu.:45.91 3rd Qu.:11.29 3rd Qu.:22.34
## Max. :48.24 Max. :13.53 Max. :25.03
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_AYB.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)
colnames(pxrf_averaged)
## [1] "Area" "Type" "MgO" "MgO;Err" "Al2O3" "Al2O3;Err"
## [7] "SiO2" "SiO2;Err" "P2O5" "P2O5;Err" "S" "S;Err"
## [13] "Cl" "Cl;Err" "K2O" "K2O;Err" "CaO" "CaO;Err"
## [19] "Ti" "Ti;Err" "V" "V;Err" "Cr" "Cr;Err"
## [25] "Mn" "Mn;Err" "Fe" "Fe;Err" "Co" "Co;Err"
## [31] "Ni" "Ni;Err" "Cu" "Cu;Err" "Zn" "Zn;Err"
## [37] "As" "As;Err" "Se" "Se;Err" "Rb" "Rb;Err"
## [43] "Sr" "Sr;Err" "Y" "Y;Err" "Zr" "Zr;Err"
## [49] "Nb" "Nb;Err" "Mo" "Mo;Err" "Rh" "Rh;Err"
## [55] "Pd" "Pd;Err" "Ag" "Ag;Err" "Cd" "Cd;Err"
## [61] "Sn" "Sn;Err" "Sb" "Sb;Err" "Ba" "Ba;Err"
## [67] "La" "La;Err" "Ce" "Ce;Err" "Hf" "Hf;Err"
## [73] "Ta" "Ta;Err" "W" "W;Err" "Pt" "Pt;Err"
## [79] "Au" "Au;Err" "Hg" "Hg;Err" "Tl" "Tl;Err"
## [85] "Pb" "Pb;Err" "Bi" "Bi;Err" "Th" "Th;Err"
## [91] "U" "U;Err"
pxrf_all: All possible elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all[-c(1:2)]))
## MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti V Cr Mn Fe
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ni Cu Zn As Rb Sr Y Zr Nb Mo Ag Sn Ba
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ta Pb
## 0 0
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-As) %>%
select(-Pb) %>%
select(-MgO) %>%
select(-Cu) %>%
select(-Cr) %>%
select(-Nb) %>%
select(-Mo) %>%
select(-Ag) %>%
select(-Sn) %>%
select(-Ba) %>%
select(-Ta)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 4), data = pxrf_final, label = TRUE, label.size = 3)
PCA with final elements:
# Elements
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.2053 1.3764 0.85824 0.56195 0.40409 0.16327 9.454e-17
## Proportion of Variance 0.6079 0.2368 0.09207 0.03947 0.02041 0.00333 0.000e+00
## Cumulative Proportion 0.6079 0.8447 0.93678 0.97626 0.99667 1.00000 1.000e+00
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam Byzantine elements")
autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam Byzantine grouped by sample type")
PCA(pxrf_final[3:10])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 7 individuals, described by 8 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# HCA dendrogram, samples color coded by type:
dend <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
highlight_branches_col
type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l = 50)
col_type <- cols_t[type]
labels_colors(dend) <- col_type[order.dendrogram(dend)]
plot(dend, main="HCA with sample types (Copper included)")
rect.dendrogram(dend, 5, border = "Black", lty = 5)
legend("topright", legend = levels(type), fill = cols_t)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern);
library(reshape2)
# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample")
ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
geom_line() +
ggtitle("Grain size distribution curves")
# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample")
ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
geom_bar(stat = 'identity', position = 'fill') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Transforming data to long form
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context")
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context")
# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample")
tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")
LOI dataset:
loi[c(8:9,13:14)]
## context type c550 c950
## AY-43 Byzantine ceiling 2.8231692 13.606389
## AY-44 Byzantine floor 3.1549254 4.143032
## AY-45 Byzantine PEM (burnt) 9.7500231 6.336877
## AY-46 Byzantine PEM 3.0153992 1.087679
## AY-47 Byzantine ceiling 2.2356517 6.463715
## AY-48 Byzantine ceiling 1.8207856 15.544905
## AY-49 Byzantine PEM 0.4835757 32.404290
TGA dataset:
tga[c(8:9,10:11)]
## context type c550 c950
## AY-43 Byzantine ceiling 3.2957418 12.043832
## AY-44 Byzantine floor 4.3270582 8.653940
## AY-45 Byzantine PEM (burnt) 6.2941554 9.781651
## AY-46 Byzantine PEM 3.8585514 6.813924
## AY-47 Byzantine ceiling 2.0075863 7.902190
## AY-48 Byzantine ceiling 1.8409714 11.731844
## AY-49 Byzantine PEM 0.3230349 29.724115
# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = variable)) +
geom_boxplot() +
labs(title="Traditional LOI",
x ="Temperature", y = "LOI")
# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = variable)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI",
x ="Temperature", y = "LOI")
# Color by context biplot: Trad LOI
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(type))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Traditional LOI - color by type",
color = "Type",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Color by context biplot: TGA
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(type))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "TGA LOI - color by type",
color = "Type",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Traditional LOI",
fill="Temperature",
x = "",
y = "LOI (%)")
# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Thermogravimeter LOI",
fill="Temperature",
x = "",
y = "LOI (%)")
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :34.56 Min. : 2.480 Min. : 6.98
## 1st Qu.:38.93 1st Qu.: 5.215 1st Qu.:12.73
## Median :45.30 Median : 5.540 Median :13.28
## Mean :46.69 Mean : 6.211 Mean :14.56
## 3rd Qu.:50.95 3rd Qu.: 6.675 3rd Qu.:16.21
## Max. :67.20 Max. :11.680 Max. :23.75
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_israel.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_all: All possible elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all[-c(1:2)]))
## MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti V Cr Mn Fe
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ni Cu Zn As Se Rb Sr Y Zr Nb Mo Ag Sn
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ba La
## 0 0
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-Nb) %>%
select(-MgO) %>%
select(-Cu) %>%
select(-Cr) %>%
select(-As) %>%
select(-Se) %>%
select(-Mo) %>%
select(-Ag) %>%
select(-Sn) %>%
select(-Ba) %>%
select(-La)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
pxrf_MB: Final analysis data set for MUDBRICK samples only (= no floor or lime plaster)
pxrf_MB <- pxrf_final[-c(1:7), ]
rownames(pxrf_MB)
## [1] "AH-4" "AH-5" "RLZ-1" "TI-1" "TI-10" "TI-2" "TI-3" "TI-4" "TI-5"
## [10] "TI-6" "TI-7" "TI-8"
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 7), data = pxrf_final, label = TRUE, label.size = 3)
PCA with final elements:
# Elements
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.214 1.0368 0.9836 0.69250 0.60125 0.33515 0.28885
## Proportion of Variance 0.613 0.1344 0.1209 0.05994 0.04519 0.01404 0.01043
## Cumulative Proportion 0.613 0.7474 0.8683 0.92824 0.97343 0.98747 0.99790
## PC8
## Standard deviation 0.1298
## Proportion of Variance 0.0021
## Cumulative Proportion 1.0000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")
autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE, main = "PCA Israel grouped by area")
autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE, main = "PCA Israel grouped by sample type")
PCA(pxrf_final[3:10])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 8 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
PCA with only mudbrick samples:
# Elements
colnames(pxrf_MB[3:10])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y"
# PCA analysis
pca_3 <- prcomp(pxrf_MB[3:10])
summary(pca_3)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.5233 1.1089 0.77291 0.54054 0.29671 0.24740 0.13783
## Proportion of Variance 0.7355 0.1421 0.06901 0.03375 0.01017 0.00707 0.00219
## Cumulative Proportion 0.7355 0.8775 0.94651 0.98026 0.99043 0.99750 0.99969
## PC8
## Standard deviation 0.05140
## Proportion of Variance 0.00031
## Cumulative Proportion 1.00000
# PCA plots
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel MB elements")
autoplot(pca_3, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE, main = "PCA Israel MB grouped by area")
PCA(pxrf_MB[3:10])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 12 individuals, described by 8 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_MB %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
highlight_branches_col
area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 4, border = "Black", lty = 5)
legend("topright", legend = levels(area), fill = cols_a)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern);
library(reshape2)
# Read and filter data
grain <- read.xlsx("data/grain_israel.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample")
ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
geom_line() +
ggtitle("Grain size distribution curves")
# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample")
ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
geom_bar(stat = 'identity', position = 'fill') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Transforming data to long form
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context")
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context")
# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample")
tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")
LOI dataset:
loi[c(8:9,13:14)]
## context type c550 c950
## AH-1 Ad Halom lime plaster (fine) 3.636174 29.250009
## AH-2 Ad Halom floor 1.946490 9.981413
## AH-3 Ad Halom lime plaster (coarse) 4.192486 19.321298
## AH-4 Ad Halom mudbrick 2.542779 4.414064
## AH-5 Ad Halom mudbrick 1.471627 7.026742
## AH-6 Ad Halom floor plaster 1.774556 40.504665
## AH-7 Ad Halom lime plaster (coarse) 4.941322 18.079922
## AH-8 Ad Halom floor 2.262738 15.868588
## AH-9 Ad Halom lime plaster (fine) 1.674261 33.459065
## TI-1 Tell Iztabba mudbrick 2.056555 42.038495
## TI-2 Tell Iztabba mudbrick 1.847381 42.608634
## TI-3 Tell Iztabba mudbrick 3.595561 29.149144
## TI-4 Tell Iztabba mudbrick 4.892311 29.255047
## TI-5 Tell Iztabba mudbrick 3.460312 27.795385
## TI-6 Tell Iztabba mudbrick 3.169395 33.345746
## TI-7 Tell Iztabba mudbrick 2.381649 40.992167
## TI-8 Tell Iztabba mudbrick 3.088721 26.134740
## TI-10 Tell Iztabba mudbrick 2.061447 41.299332
## RLZ-1 Rishon Le'Zion mudbrick 2.364044 3.727290
TGA dataset:
tga[c(8:9,10:11)]
## context type c550 c950
## AH-4 Ad Halom mudbrick 2.789774 4.486895
## AH-5 Ad Halom mudbrick 1.106870 4.457738
## RLZ-1 Rishon Le'Zion mudbrick 2.604865 4.169125
## TI-1 Tell Iztabba mudbrick 1.893905 42.031107
## TI-2 Tell Iztabba mudbrick 1.707268 42.691722
## TI-3 Tell Iztabba mudbrick 3.102582 30.945771
## TI-4 Tell Iztabba mudbrick 4.991016 30.111368
## TI-5 Tell Iztabba mudbrick 3.279413 28.194114
## TI-6 Tell Iztabba mudbrick 2.922239 34.000000
## TI-7 Tell Iztabba mudbrick 2.051722 41.455573
## TI-8 Tell Iztabba mudbrick 2.931780 29.661181
## TI-10 Tell Iztabba mudbrick 2.014504 41.949927
# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI (Ad-Halom non-MB samples are included!)",
x ="Temperature", y = "LOI")
# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI (Only MB)",
x ="Temperature", y = "LOI")
# Color by context biplot: Trad LOI
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) +
labs(title = "Traditional LOI - color by context",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Color by context biplot: TGA
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "TGA LOI - color by context",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Traditional LOI",
fill="Temperature",
x = "",
y = "LOI (%)")
# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Thermogravimeter LOI",
fill="Temperature",
x = "",
y = "LOI (%)")
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :43.75 Min. : 3.160 Min. :13.85
## 1st Qu.:56.08 1st Qu.: 3.770 1st Qu.:14.71
## Median :64.30 Median : 4.350 Median :15.85
## Mean :63.19 Mean : 6.649 Mean :17.27
## 3rd Qu.:71.51 3rd Qu.: 6.280 3rd Qu.:17.70
## Max. :75.34 Max. :19.530 Max. :25.84
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
MB_table <- matrix(c("LA54/4", "30 cm x 18 cm", "Top row - weathered",
"LA54/4", "25 cm x 18 cm", "Mid 4th row from the top",
"LA54/4", "35 cm x 18 cm (est.)", "B. 8th row from the top",
"LA54/4", "45 cm x 18 cm", "9th row from the top",
"LA54/4", "46 cm x 16 cm", "6th row from the top",
"LA59/2", "45 cm x 15 cm (est.)", "Top row - weathered",
"LA59/2", "45 cm x 15 cm (est.)", "3rd row from the top",
"LA59/2", "33 cm x 15 cm (est.)", "5th row below stones",
"LA59/2", "44 cm x 13 cm", "9th row of the top",
"LA59/2", "40 cm x 20 cm", "B. below disturbance under the marl",
"LA54/7", "14 cm x 15 cm (est.)", "(no additional info)"),
ncol=3, byrow=TRUE)
colnames(MB_table) <- c("Context", "Size", "Row")
rownames(MB_table) <- c("PP-9", "PP-10", "PP-11", "PP-12", "PP-13", "PP-14", "PP-15", "PP-16", "PP-17", "PP-18", "PP-19")
MB_table <- as.table(MB_table)
library(knitr)
kable(MB_table)
| Context | Size | Row | |
|---|---|---|---|
| PP-9 | LA54/4 | 30 cm x 18 cm | Top row - weathered |
| PP-10 | LA54/4 | 25 cm x 18 cm | Mid 4th row from the top |
| PP-11 | LA54/4 | 35 cm x 18 cm (est.) | B. 8th row from the top |
| PP-12 | LA54/4 | 45 cm x 18 cm | 9th row from the top |
| PP-13 | LA54/4 | 46 cm x 16 cm | 6th row from the top |
| PP-14 | LA59/2 | 45 cm x 15 cm (est.) | Top row - weathered |
| PP-15 | LA59/2 | 45 cm x 15 cm (est.) | 3rd row from the top |
| PP-16 | LA59/2 | 33 cm x 15 cm (est.) | 5th row below stones |
| PP-17 | LA59/2 | 44 cm x 13 cm | 9th row of the top |
| PP-18 | LA59/2 | 40 cm x 20 cm | B. below disturbance under the marl |
| PP-19 | LA54/7 | 14 cm x 15 cm (est.) | (no additional info) |
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(plot3D); # 3D plots
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use) pxrf_errors: Averaged data including averaged errors, missing and dubious elements emitted
pxrf_errors <- pxrf_averaged %>% select_if(all_na)
pxrf_errors <- pxrf_errors %>%
select(-Al2O3) %>%
select(-'Al2O3;Err') %>%
select(-CaO) %>%
select(-'CaO;Err') %>%
select(-SiO2) %>%
select(-'SiO2;Err') %>%
select(-Cl) %>%
select(-'Cl;Err') %>%
select(-K2O) %>%
select(-'K2O;Err') %>%
select(-Ni) %>%
select(-'Ni;Err') %>%
select(-P2O5) %>%
select(-'P2O5;Err') %>%
select(-S) %>%
select(-'S;Err') %>%
select(-V) %>%
select(-'V;Err') %>%
select(-As) %>%
select(-'As;Err') %>%
select(-Rh) %>%
select(-'Rh;Err') %>%
select(-Ag) %>%
select(-'Cr;Err') %>%
select(-'Co;Err') %>%
select(-'Se;Err') %>%
select(-Nb) %>%
select(-'Nb;Err') %>%
select(-'Mo;Err') %>%
select(-'Pd;Err') %>%
select(-'Cd;Err') %>%
select(-'Sn;Err') %>%
select(-'Sb;Err') %>%
select(-'Ba;Err') %>%
select(-'La;Err') %>%
select(-'Ag;Err') %>%
select(-'Ce;Err') %>%
select(-'Hf;Err') %>%
select(-'Ta;Err') %>%
select(-'W;Err') %>%
select(-'Pt;Err') %>%
select(-'Au;Err') %>%
select(-'Hg;Err') %>%
select(-'Tl;Err') %>%
select(-Pb) %>%
select(-'Pb;Err') %>%
select(-'Bi;Err') %>%
select(-'Th;Err') %>%
select(-'U;Err')
pxrf_errors[3:22] <- (pxrf_errors[3:22] * 1000)
pxrf_errors[3:22] <- round(pxrf_errors[3:22], digits = 1)
kable(pxrf_errors[3:22])
| MgO | MgO;Err | Ti | Ti;Err | Cr | Mn | Mn;Err | Fe | Fe;Err | Co | Cu | Cu;Err | Zn | Zn;Err | Se | Rb | Rb;Err | Sr | Sr;Err | Y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PP-10 | 3703.8 | 922.4 | 195.4 | 3.9 | 1.5 | 39.0 | 2.6 | 2213.5 | 11.9 | 0 | 11.3 | 0.7 | 3.7 | 0.4 | 0 | 1.5 | 0.4 | 39.0 | 0.6 | 1.4 |
| PP-11 | 4009.8 | 1062.1 | 229.5 | 4.3 | 1.0 | 38.5 | 2.8 | 2386.6 | 13.0 | 0 | 5.5 | 0.5 | 2.5 | 0.3 | 0 | 1.6 | 0.3 | 29.0 | 0.5 | 1.1 |
| PP-12 | 2918.5 | 719.6 | 126.3 | 3.2 | 0.0 | 17.8 | 1.7 | 1005.8 | 7.3 | 0 | 5.6 | 0.6 | 3.2 | 0.5 | 0 | 1.8 | 0.5 | 28.3 | 0.8 | 0.5 |
| PP-13 | 4547.7 | 1093.6 | 192.8 | 4.0 | 0.0 | 38.9 | 2.6 | 2131.2 | 12.0 | 0 | 11.3 | 0.7 | 5.0 | 0.4 | 0 | 2.1 | 0.3 | 48.4 | 0.7 | 1.5 |
| PP-9 | 4369.6 | 1200.2 | 191.5 | 3.9 | 0.6 | 41.8 | 2.7 | 2448.1 | 12.7 | 0 | 7.7 | 0.6 | 3.9 | 0.4 | 0 | 2.1 | 0.3 | 46.8 | 0.6 | 1.5 |
| PP-19 | 4164.6 | 853.1 | 96.9 | 3.3 | 0.0 | 11.9 | 1.7 | 961.7 | 7.8 | 0 | 4.5 | 0.6 | 1.7 | 0.4 | 0 | 0.9 | 0.4 | 31.8 | 0.7 | 0.3 |
| PP-14 | 4430.4 | 1051.3 | 218.8 | 4.2 | 0.0 | 33.5 | 2.6 | 2224.1 | 12.5 | 0 | 6.7 | 0.6 | 2.4 | 0.3 | 0 | 1.7 | 0.3 | 35.1 | 0.6 | 1.4 |
| PP-15 | 4088.3 | 1177.5 | 182.1 | 3.7 | 0.0 | 27.1 | 2.3 | 1878.0 | 11.1 | 0 | 7.3 | 0.7 | 3.2 | 0.5 | 0 | 1.9 | 0.4 | 35.3 | 0.7 | 1.6 |
| PP-16 | 5124.8 | 1222.8 | 192.2 | 3.9 | 0.0 | 39.8 | 2.6 | 2300.3 | 12.5 | 0 | 9.7 | 0.7 | 3.9 | 0.4 | 0 | 1.7 | 0.4 | 37.7 | 0.6 | 1.7 |
| PP-17 | 3273.7 | 960.4 | 170.9 | 3.6 | 0.7 | 28.6 | 2.2 | 1762.4 | 10.1 | 0 | 6.3 | 0.6 | 3.4 | 0.4 | 0 | 1.9 | 0.4 | 28.8 | 0.6 | 1.3 |
| PP-18 | 3359.0 | 969.9 | 161.8 | 4.0 | 0.0 | 20.8 | 2.2 | 1491.6 | 10.5 | 0 | 4.7 | 0.6 | 1.9 | 0.3 | 0 | 1.2 | 0.3 | 17.9 | 0.5 | 0.9 |
| PP-1 | 3378.9 | 877.4 | 136.8 | 3.4 | 4.4 | 24.7 | 2.0 | 1295.9 | 8.8 | 0 | 5.7 | 0.6 | 3.4 | 0.5 | 0 | 2.0 | 0.4 | 33.9 | 0.7 | 1.4 |
| PP-2 | 4717.6 | 1441.6 | 155.8 | 3.4 | 0.7 | 44.0 | 2.7 | 2338.2 | 12.1 | 0 | 5.9 | 0.5 | 3.2 | 0.3 | 0 | 2.1 | 0.3 | 34.0 | 0.5 | 1.7 |
| PP-3 | 3290.0 | 816.9 | 143.1 | 3.3 | 0.0 | 31.7 | 2.2 | 1677.0 | 9.8 | 0 | 6.6 | 0.6 | 4.5 | 0.5 | 0 | 2.1 | 0.4 | 40.0 | 0.7 | 1.6 |
| PP-4 | 3165.8 | 1123.0 | 181.1 | 4.2 | 0.8 | 42.7 | 2.7 | 2308.1 | 12.1 | 0 | 5.6 | 0.5 | 3.8 | 0.4 | 0 | 2.1 | 0.3 | 35.9 | 0.6 | 1.6 |
| PP-5 | 4450.8 | 1561.7 | 155.4 | 3.4 | 0.0 | 47.0 | 2.8 | 2435.1 | 12.4 | 0 | 5.3 | 0.5 | 3.7 | 0.3 | 0 | 2.3 | 0.3 | 40.9 | 0.6 | 1.8 |
| PP-6 | 3694.8 | 1296.6 | 203.2 | 3.6 | 0.0 | 52.2 | 2.8 | 2555.2 | 12.4 | 0 | 5.8 | 0.5 | 3.1 | 0.4 | 0 | 2.9 | 0.3 | 24.0 | 0.5 | 2.2 |
| PP-7 | 4780.3 | 1956.9 | 237.1 | 3.9 | 9.1 | 56.3 | 3.1 | 3061.0 | 13.8 | 0 | 5.7 | 0.5 | 4.0 | 0.4 | 0 | 3.6 | 0.3 | 31.0 | 0.5 | 2.6 |
| PP-8 | 4738.8 | 1249.7 | 175.4 | 3.7 | 7.8 | 36.3 | 2.6 | 1912.4 | 11.3 | 0 | 6.0 | 0.6 | 3.1 | 0.4 | 0 | 2.2 | 0.3 | 28.4 | 0.5 | 2.1 |
pxrf_all: All “possible” elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all[-c(1:2)]))
## MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti V Cr Mn Fe
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ni Cu Zn As Rb Sr Y Zr Nb Mo Rh Ag Ba
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ta Pb
## 0 0
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-As) %>%
select(-Rh) %>%
select(-Ag) %>%
select(-MgO) %>%
select(-Cu) %>%
select(-Cr) %>%
select(-Nb) %>%
select(-Mo) %>%
select(-Ba) %>%
select(-Ta) %>%
select(-Pb)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
pxrf_MB: Final analysis data set for MUDBRICK samples only (= no soil)
pxrf_MB <- pxrf_final[c(1:11), ]
Only mudbrick samples, K-means cluster analysis with 5 possible clusters
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_MB[3:10], 5), data = pxrf_MB, label = TRUE, label.size = 3)
PCA with only mudbrick samples:
# Elements
colnames(pxrf_MB[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_MB[3:10])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.357 1.2283 0.64529 0.39890 0.37065 0.28064 0.11355
## Proportion of Variance 0.706 0.1917 0.05291 0.02022 0.01746 0.01001 0.00164
## Cumulative Proportion 0.706 0.8977 0.95063 0.97084 0.98830 0.99831 0.99994
## PC8
## Standard deviation 0.02103
## Proportion of Variance 0.00006
## Cumulative Proportion 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")
autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by area")
PCA(pxrf_MB[3:10])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 8 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
#3D plot
scores = as.data.frame(pca_1$x)
scatter3D(scores$PC2, scores$PC3, scores$PC1,
xlab = "PC2 (23,6 %)", ylab = "PC3 (5,5 %)", zlab="PC1 (64,2 %)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(scores$PC2, scores$PC3, scores$PC1, labels = rownames(scores),
add = TRUE, colkey = FALSE, cex = 0.7)
PCA with soil samples:
# Elements
colnames(pxrf_final[3:10])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y"
# PCA analysis
pca_3 <- prcomp(pxrf_final[3:10])
summary(pca_3)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.1106 1.2470 1.1462 0.53191 0.40063 0.34364 0.32313
## Proportion of Variance 0.5568 0.1944 0.1642 0.03537 0.02006 0.01476 0.01305
## Cumulative Proportion 0.5568 0.7512 0.9155 0.95082 0.97089 0.98565 0.99870
## PC8
## Standard deviation 0.1019
## Proportion of Variance 0.0013
## Cumulative Proportion 1.0000
# PCA plots
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")
autoplot(pca_3, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by area")
autoplot(pca_3, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by sample type")
PCA(pxrf_final[3:10])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 8 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
HCA including only MB samples, colored by area:
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_MB %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
highlight_branches_col
area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 5, border = "Black", lty = 5)
legend("topright", legend = levels(area), fill = cols_a)
HCA with soil samples, colored by type:
# HCA dendrogram, samples color coded by type:
dend3 <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
highlight_branches_col
type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l = 50)
col_type <- cols_t[type]
labels_colors(dend3) <- col_type[order.dendrogram(dend3)]
plot(dend3, main="HCA with all samples by type")
rect.dendrogram(dend3, 7, border = "Black", lty = 5)
legend("topright", legend = levels(type), fill = cols_t)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern);
library(reshape2)
# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))
# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]
# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(MB_grain)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample")
ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
geom_line() +
ggtitle("Grain size distribution curves")
# Only MB samples
grain_MB2 <- grain[c(1:11),]
grain_long3 <- melt(grain_MB2[c(1,14:113)], id = "Sample")
ggplot(grain_long3, aes(x=variable, y=value, color=Sample, group=Sample)) +
geom_line() +
ggtitle("Grain size distribution curves - MB only")
Barplots for 5 size fractions:
# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample")
ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle("Grain size distribution by fractions")
# Stacked barplot
grain_long4 <- melt(grain_MB2[c(1,9:13)], id = "Sample")
ggplot(grain_long4, aes(x = reorder(Sample, value), y = value, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle("Grain size distribution by fractions - MB only")
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Creating MB only subsets
loi_MB <- subset(loi[9:19, ])
tga_MB <- subset(tga[9:20, ])
tga_MB <- subset(tga_MB[-4, ]) # No duplicates
tga_MB1 <- subset(tga[9:22, ]) # Includes some duplicates
# Transforming data to long form (MB only)
loi_MB_long <- loi_MB[c(8,13:14)]
loi_MB_long <- melt(loi_MB_long, id = "context")
tga_MB_long <- tga_MB1[c(8,10:11)]
tga_MB_long <- melt(tga_MB_long, id = "context")
# Transforming data to long form (Soil included)
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context")
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context")
# Long form data for stacked barplots
loi_long2 <- melt(loi_MB[c(1,13:14)], value.name = "Temp", id= "sample")
tga_long2 <- melt(tga_MB[c(1,10:11)], value.name = "Temp", id= "Name")
LOI dataset:
loi[c(8:9,13:14)]
## context type c550 c950
## PP-1 Hadjuabudllah l1 soil layer 1 5.986894 19.67880
## PP-2 Hadjuabudllah l2 soil layer 2 4.466405 17.59870
## PP-3 Laona soil l1 soil layer 1 5.734589 21.55943
## PP-4 Laona soil l2 soil layer 2 4.323061 17.76702
## PP-5 Laona soil l3 soil layer 3 6.877261 19.61697
## PP-6 Laona soil l1 soil layer 1 5.038672 17.28507
## PP-7 Laona soil l2 soil layer 2 5.582797 17.71488
## PP-8 Laona soil l3 soil layer 3 5.134295 19.63221
## PP-9 LA54:4 mudbrick 7.451505 18.35791
## PP-10 LA54:4 mudbrick 3.557623 14.55842
## PP-11 LA54:4 mudbrick 4.002255 21.24757
## PP-12 LA54:4 mudbrick 2.797378 22.23702
## PP-13 LA54:4 mudbrick 3.156025 21.81049
## PP-14 LA59:2 mudbrick 2.818483 22.65698
## PP-15 LA59:2 mudbrick 3.822946 25.15246
## PP-16 LA59:2 mudbrick 2.365256 21.25664
## PP-17 LA59:2 mudbrick 3.860836 20.58203
## PP-18 LA59:2 mudbrick 2.451871 33.21076
## PP-19 LA54:7 mudbrick 1.739797 33.78112
## PP-1_2 Hadjuabudllah l1 soil layer 1 6.137732 21.18259
## PP-2_2 Hadjuabudllah l2 soil layer 2 4.429557 18.22193
## PP-3_2 Laona soil l1 soil layer 1 5.521160 22.61670
## PP-4_2 Laona soil l2 soil layer 2 4.582459 20.95611
## PP-5_2 Laona soil l3 soil layer 3 6.888629 20.42575
## PP-6_2 Laona soil l1 soil layer 1 5.348806 16.70897
## PP-7_2 Laona soil l2 soil layer 2 5.537857 18.92266
## PP-8_2 Laona soil l3 soil layer 3 5.230428 19.99844
TGA dataset:
tga[c(8:9,10:11)]
## context type c550 c950
## PP-1 Hadjuabudllah l1 soil layer 1 5.328983 22.05629
## PP-2 Hadjuabudllah l2 soil layer 2 3.798409 18.86880
## PP-3 Laona soil l1 soil layer 1 4.452588 21.57446
## PP-4 Laona soil l2 soil layer 2 4.537622 20.07908
## PP-5 Laona soil l3 soil layer 3 4.657919 23.10036
## PP-6 Laona soil l1 soil layer 1 4.892221 16.89988
## PP-7 Laona soil l2 soil layer 2 4.601116 18.47942
## PP-8 Laona soil l3 soil layer 3 4.525488 20.99294
## PP-9 LA54:4 mudbrick 5.392111 18.62861
## PP-10 LA54:4 mudbrick 2.360845 15.24474
## PP-11 LA54:4 mudbrick 2.773246 24.52314
## PP-11 (2) LA54:4 mudbrick 3.164817 20.66002
## PP-12 LA54:4 mudbrick 2.734021 21.45383
## PP-13 LA54:4 mudbrick 3.235943 21.34319
## PP-14 LA59:2 mudbrick 3.202329 22.49060
## PP-15 LA59:2 mudbrick 3.508931 22.92802
## PP-16 LA59:2 mudbrick 2.215719 19.75916
## PP-17 LA59:2 mudbrick 3.921377 22.16794
## PP-18 LA59:2 mudbrick 3.234501 31.05334
## PP-19 LA54:7 mudbrick 2.204428 33.79251
## PP-10 (2) LA54:4 mudbrick 2.226568 14.54839
## PP-9 (2) LA54:4 mudbrick 5.258741 19.01388
## PP-8 (2) Laona soil l3 soil layer 3 4.474886 22.50903
## PP-7 (2) Laona soil l3 soil layer 2 4.739991 19.00483
## PP-6 (2) Laona soil l1 soil layer 1 4.618851 18.27756
## PP-5 (2) Laona soil l3 soil layer 3 5.122344 22.59504
## PP-4 (2) Laona soil l2 soil layer 2 4.559860 20.33749
## PP-3 (2) Laona soil l1 soil layer 1 4.302926 20.35644
# Colored boxplots: Trad LOI, MB only
ggplot(loi_MB_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI (MB only)",
x ="Temperature", y = "LOI")
# Colored boxplots: TGA LOI, MB only
ggplot(tga_MB_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI (MB only)",
x ="Temperature", y = "LOI")
# Colored boxplots: Trad LOI, soil samples included
ggplot(loi_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI (soil included)",
x ="Temperature", y = "LOI")
# Colored boxplots: TGA LOI, soil samples included
ggplot(tga_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI (soil included)",
x ="Temperature", y = "LOI")
# MB only biplot: Trad LOI
ggplot(loi_MB,
aes(c550, c950, label = rownames(loi_MB), color = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Traditional LOI: MB only, organic vs. carbonate content",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# MB only biplot: TGA LOI
ggplot(tga_MB,
aes(c550, c950, label = rownames(tga_MB), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "TGA LOI: MB only, organic vs. carbonate content",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Color by context biplot: Trad LOI
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) +
labs(title = "Traditional LOI, soil included - color by context",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Color by context biplot: TGA
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) +
labs(title = "TGA LOI, soil included - color by context",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Traditional LOI (MB only)",
fill="Temperature",
x = "",
y = "LOI (%)")
# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Thermogravimeter LOI (MB only)",
fill="Temperature",
x = "",
y = "LOI (%)")
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :62.38 Min. : 7.27 Min. :19.90
## 1st Qu.:63.47 1st Qu.: 8.50 1st Qu.:21.36
## Median :64.19 Median : 9.34 Median :21.72
## Mean :65.50 Mean : 9.29 Mean :21.92
## 3rd Qu.:65.78 3rd Qu.:10.56 3rd Qu.:22.96
## Max. :72.00 Max. :10.90 Max. :23.90
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
### Combined results
Combined PCA and HCA: including geochemistry and LOI data
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(plot3D); # 3D plots
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/combo_PP.xlsx", sep=";")
# Transforming remaining NA values to zeros for analysis
pxrf[is.na(pxrf)] <- 0
pxrf[4:16] <- scale(pxrf[4:16])
pxrf_MB <- pxrf[c(9:19), ]
rownames(pxrf_MB) <- pxrf_MB$Sample
PCA & HCA Combided geochemistry + LOI
# PCA analysis
pca_1 <- prcomp(pxrf_MB[4:13])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.5575 1.2647 0.92468 0.76958 0.52297 0.37607 0.32250
## Proportion of Variance 0.6453 0.1578 0.08436 0.05843 0.02698 0.01395 0.01026
## Cumulative Proportion 0.6453 0.8031 0.88749 0.94592 0.97290 0.98686 0.99712
## PC8 PC9 PC10
## Standard deviation 0.16348 0.04660 0.01791
## Proportion of Variance 0.00264 0.00021 0.00003
## Cumulative Proportion 0.99975 0.99997 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA (combined results) Palaepaphos elements")
autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE, main = "PCA (combined results) Palaepaphos grouped by area")
#3D plot
scores = as.data.frame(pca_1$x)
scatter3D(scores$PC2, scores$PC3, scores$PC1,
xlab = "PC2 (21,3 %)", ylab = "PC3 (8,4 %)", zlab="PC1 (60,8 %)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(scores$PC2, scores$PC3, scores$PC1, labels = rownames(scores),
add = TRUE, colkey = FALSE, cex = 0.7)
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_MB %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
highlight_branches_col
area <- as.factor(pxrf_MB[, 2])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 6, border = "Black", lty = 5)
legend("topright", legend = levels(area), fill = cols_a)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms
# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Remove unused columns
pxrf <- pxrf %>%
select(-Site) %>%
select(-'Other;name') %>%
select(-'File;#') %>%
select(-DateTime) %>%
select(-Application) %>%
select(-Method) %>%
select(-ElapsedTime) %>%
select(-'Cal;Check')
# Coercing "< LOD" -results to NA:s
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)
# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0
# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)
# Assigning sample names as new row names
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)
# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"
# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))
# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])
# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)
pxrf_all: All possible elements (the numbers below are the amount of NA values)
colSums(is.na(pxrf_all[-c(1:2)]))
## MgO Al2O3 SiO2 P2O5 S Cl K2O CaO Ti V Cr Mn Fe
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Co Ni Cu Zn As Rb Sr Y Zr Nb Ag Sn Ba
## 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ta Pb Bi
## 0 0 0
pxrf_final: Final analysis data set with only the selected elements
# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>%
select(-Al2O3) %>%
select(-CaO) %>%
select(-SiO2) %>%
select(-Cl) %>%
select(-K2O) %>%
select(-Ni) %>%
select(-P2O5) %>%
select(-S) %>%
select(-As) %>%
select(-Nb) %>%
select(-Cr) %>%
select(-Ba) %>%
select(-MgO) %>%
select(-Cu) %>%
select(-Co) %>%
select(-Ag) %>%
select(-Sn) %>%
select(-Ta) %>%
select(-Pb) %>%
select(-Bi)
# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0
# Final analysis data set
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 4), data = pxrf_final, label = TRUE, label.size = 3)
PCA with final elements:
# Elements
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V" "Mn" "Fe" "Zn" "Rb" "Sr" "Y" "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.3415 1.1244 0.80489 0.58457 0.43484 0.23991 0.12562
## Proportion of Variance 0.6853 0.1580 0.08098 0.04272 0.02364 0.00719 0.00197
## Cumulative Proportion 0.6853 0.8434 0.92434 0.96706 0.99069 0.99789 0.99986
## PC8
## Standard deviation 0.03365
## Proportion of Variance 0.00014
## Cumulative Proportion 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Artaxata elements")
autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE, main = "PCA Artaxata grouped by area")
PCA(pxrf_final[3:10])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 8 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# HCA dendrogram, samples color coded by area:
dend <-
pxrf_final %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
highlight_branches_col
area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]
plot(dend, main="HCA with sample areas (Copper included)")
rect.dendrogram(dend, 5, border = "Black", lty = 5)
legend("topright", legend = levels(area), fill = cols_a)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern);
library(reshape2)
# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample")
ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
geom_line() +
ggtitle("Grain size distribution curves")
# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample")
ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
geom_bar(stat = 'identity', position = 'fill') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(openxlsx);
library(dplyr);
library(ggplot2);
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form
loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
# Transforming data to long form
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context")
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context")
# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample")
tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")
LOI dataset:
loi[c(8:9,13:14)]
## context type c550 c950
## AA-1 Hill II mudbrick 5.610986 5.756990
## AA-2 Hill II mudbrick 4.711074 7.965070
## AA-3 Hill II mudbrick 4.035368 7.841600
## AA-4 Urartian silo mudbrick 3.080904 7.738517
## AA-5 Phase I mudbrick 2.940399 6.257292
## AA-6 Phase I mudbrick 3.125140 5.507166
## AA-7 Phase II or III mudbrick 4.573474 6.267972
## AA-8 Phase II or III mudbrick 3.769098 7.238074
## AA-9 Phase II mudbrick 3.696483 6.966079
## AA-10 Phase II mudbrick 5.181930 5.580024
TGA dataset:
tga[c(8:9,10:11)]
## context type c550 c950
## AA-1 Hill II mudbrick 5.811508 5.611934
## AA-2 Hill II mudbrick 4.751337 8.599873
## AA-3 Hill II mudbrick 4.078462 7.997570
## AA-4 Urartian silo mudbrick 3.403442 7.086302
## AA-4 (2) Urartian silo mudbrick 3.415174 7.167431
## AA-5 Phase I mudbrick 2.799218 6.532557
## AA-6 Phase I mudbrick 3.118409 4.097341
## AA-7 Phase II or III mudbrick 4.395924 6.082521
## AA-8 Phase II or III mudbrick 3.689788 6.722017
## AA-9 Phase II mudbrick 4.417015 6.467449
## AA-10 Phase II mudbrick 4.307988 6.154440
# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Traditional LOI",
color = "Context",
x ="Temperature",
y = "LOI")
# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +
geom_boxplot() +
labs(title="Thermogravimeter LOI",
color = "Context",
x ="Temperature",
y = "LOI")
# Color by context biplot: Trad LOI
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Traditional LOI - color by context",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Color by context biplot: TGA
ggplot(tga,
aes(c550, c950, label = rownames(tga), colour = factor(context))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "TGA LOI - color by context",
color = "Context",
x = "Organic LOI (%)",
y = "Carbonate LOI (%)") +
theme(axis.title = element_text())
# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Traditional LOI",
fill="Temperature",
x = "",
y = "LOI (%)")
# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title="Thermogravimeter LOI",
fill="Temperature",
x = "",
y = "LOI (%)")
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :51.37 Min. :3.880 Min. :15.92
## 1st Qu.:57.73 1st Qu.:4.402 1st Qu.:16.49
## Median :58.79 Median :4.555 Median :16.93
## Mean :58.18 Mean :5.067 Mean :17.32
## 3rd Qu.:59.58 3rd Qu.:4.695 3rd Qu.:17.32
## Max. :61.57 Max. :8.670 Max. :21.47
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)